** This program performs decomposition of differences in survival **
** Between races ** 

cd "/disk/agedisk3/medicare.work/chandra-DUA52080/pragya-dua52080/replication/"

capture log close
log using primary_decomps.log, replace
** need version 14.0 to use putexcel
version 14.0
set more off
set matsize 1000

* set to 0001 or 05 or 20 or 100 to choose sample to work from
local sampsize = "100"

** All Measures
global MEASURES_ALL "survrsw survrsb survw2 stemi surv survre survi sars snra cath_pci cardtech CABG cath_pci_ad survd surv_b_qual surv_w_qual"

** hospital sample on which decomp is done **
** All = all hospitals appearing in the cross section for which the between is done ** 
** Constant = only hospitals appearing in both periods 1 & period 4 ** 
** We do the constant sample because this is the sample on which we will do difference-in-difference ** 
global SAMPLE "All Constant"

*Markets included **
global MARKETS "All Below_MA_median"

* bootstrap count
global NBOOT = 1000					

** Makes QB and QW for unbalanced analysis
global QB_QW = 1

** Makes data
global MAKEDATA = 1

** Generates bootsample
global GEN_BOOTSAMPLE = 1

** Static decomp
global STATIC_DECOMP = 1

** dynamic decomp
global DYNAMIC_DECOMP = 1

** Calculates endline and baseline levels
global LEVELS = 1

************************************************************************************************************************************
******************************************** CODE START *****************************************************************************
************************************************************************************************************************************

** STEP 0: Makes Q_B and Q_W for unbalanced analysis 

	if $QB_QW == 1{
		** This is going to generate q_b and q_w
		foreach yidx_cur in 1 4 {
	
			estimates clear
			clear
			use pn yidx longsurv_ami_fere_a if yidx== `yidx_cur' using  Intermediate_Output_Not_Exportable/bs0.dta, replace
			tempfile performance_score
			save `performance_score'

			clear
			use * if yidx == `yidx_cur' & insample_ami_a == 1 using  Intermediate_Output_Not_Exportable/pre_quality_calc_ami100_a_surv.dta, replace		
			** Merges in provider quality, baseline measure** 
			** "Using" allowed because the hospital file will include hospitals that don't have a valid quality measure but the risk adjustment won't ** 
			drop longsurv_ami_fere_a
			merge m:1 pn using `performance_score', assert(match using) keep(match) nogenerate
		
			** open estimates
			estimates use  Exportable_Results/estimates/longsurv_ami100_yidx`yidx_cur'_a.ster
			estimates table
			matrix coef = r(coef)
		
			** predict XB
			predict surv_predict, xb
		
			** Sense check that average of the prediction equals the average of the population ** 
			summarize surv_30
			local act_avg = r(mean)
			summarize surv_predict 
			local pred_avg = r(mean)
			assert `act_avg'> `pred_avg' - .00000001 & `act_avg'< `pred_avg' + .00000001
		
			** Generates hospital fixed effect (or random effect) without constant term
			generate hosp_fere_w = longsurv_ami_fere_a - _b[_cons]
			generate hosp_fere_b = longsurv_ami_fere_a - _b[_cons]		
			assert hosp_fere_w != . & hosp_fere_b != . 

			summarize surv_predict if b
			local mean_XB = r(mean)
			generate surv_b_XB = `mean_XB'
		
			summarize surv_predict if w
			local mean_XB = r(mean)
			generate surv_w_XB = `mean_XB'
		
			collapse (first) surv_b_XB  hosp_fere_b surv_w_XB  hosp_fere_w, by(pn) 
			generate surv_b_qual = surv_b_XB + hosp_fere_b	
			generate surv_w_qual = surv_w_XB + hosp_fere_w
			generate yidx = `yidx_cur'
			keep pn surv_b_qual surv_w_qual yidx
			tempfile yidx`yidx_cur'
			save `yidx`yidx_cur''
						
		}
				
			clear
			use `yidx1'
			append using `yidx4'
			save  Intermediate_Output_Not_Exportable/qb_qw.dta, replace
	}






** STEP 1: Make a dataset for each measure that is at the hospital level & has market share and quality in periods 1 & 4  **

if $MAKEDATA == 1{
foreach meas in $MEASURES_ALL{
foreach samp in $SAMPLE{
foreach mkts in $MARKETS{
 
	** condition 1 is for unbalanced panel
	** condition 2 is for below median MA
	** condition 3 is for all the other outcomes on constant panel 
	if ((("`meas'" == "surv_b_qual" | "`meas'" == "surv_w_qual" | "`meas'" == "surv") & "`samp'" == "All" & "`mkts'" == "All") | ("`meas'" == "surv" & "`samp'" == "Constant" & "`mkts'" == "Below_MA_median") | ("`samp'" == "Constant" & "`mkts'" == "All")) {
		
		clear
		use  * if (yidx == 1 | yidx == 4) & longsurv_ami_fere_a != . using  Intermediate_Output_Not_Exportable/bs0.dta, replace				
		merge 1:1 pn yidx using  Intermediate_Output_Not_Exportable/qb_qw.dta, assert(match) nogenerate
		

		if "`meas'" == "surv_b_qual" | "`meas'" == "surv_w_qual"{
			generate measure = `meas'			
		}
		else {
			generate measure = long`meas'_ami_fere_a	
		} 
		keep if measure != . 
		
		** Drops providers in markets with > median MA penetration if appropriate
		drop if (ma_ptcp_pn >=.29605) & "`mkts'" == "Below_MA_median"
		
		** Limit to the balanced panel if necessary
			if "`samp'" == "Constant"{
				bysort pn: gen obs_num = _N
				drop if obs_num == 1 
			}
		
		** Generates a master patient count variable
		** this is just renaming to facilitate easier calculation of shares
		
			foreach group in w b a {
				if "`mkts'" == "All"{
					generate master_pat_count_`group'_act = npatients_`group'_ami_tot_act
					if "`group'" != "a"{
						generate master_pat_count_`group'_wgt = npatients_`group'_ami_tot_wgt
						generate master_pat_count_`group'_wgtH = npatients_`group'_ami_tot_wgtH
					}
				}	
						
				if "`mkts'" == "Below_MA_median"{
					generate master_pat_count_`group'_act = npatients_`group'_ami_tot_Mb
					if "`group'" != "a"{			
						generate master_pat_count_`group'_wgt = npatients_`group'_ami_tot_wgtMb
						generate master_pat_count_`group'_wgtH = npatients_`group'_ami_tot_wgtHMb	
					}						
				}
			}
		
			
			foreach type in act wgt wgtH{
				foreach group in w b {
					sort yidx
					by yidx: egen master_total_pat_count_`group'_`type' = sum(master_pat_count_`group'_`type')
					generate master_mktsh_`group'_`type' = master_pat_count_`group'_`type'/master_total_pat_count_`group'_`type'
				}
			}
					by yidx: egen master_total_pat_count_a_act = sum(master_pat_count_a_act)
					generate master_mktsh_a_act = master_pat_count_a_act/master_total_pat_count_a_act
			
		
		** Limits to important variables
			keep pn yidx master_mktsh_b* master_mktsh_w* master_mktsh_a* measure
		
		** Reshapes wide
			keep master_mktsh* measure pn yidx
			reshape wide master_mktsh* measure, i(pn) j(yidx)
		
		** Saves data
	
		save  Intermediate_Output_Not_Exportable/decomp_file_ami100_`meas'_`samp'_`mkts'.dta, replace

}
}
}
}
}



if $GEN_BOOTSAMPLE == 1{
** Step 2: Generates bootstrap samples
foreach meas in $MEASURES_ALL{
foreach samp in $SAMPLE{
foreach mkts in $MARKETS{

	if ((("`meas'" == "surv_b_qual" | "`meas'" == "surv_w_qual" | "`meas'" == "surv") & "`samp'" == "All" & "`mkts'" == "All") | ("`meas'" == "surv" & "`samp'" == "Constant" & "`mkts'" == "Below_MA_median") | ("`samp'" == "Constant" & "`mkts'" == "All")) {
		
		forvalues bsidx=0/$NBOOT{
			
			use pn using  Intermediate_Output_Not_Exportable/decomp_file_ami100_`meas'_`samp'_`mkts'.dta, replace
			isid pn			
			count
			local sample = r(N)
			
			if `bsidx' != 0 {
				bsample `sample'
			}
			
			capture noisily mkdir Intermediate_Output_Not_Exportable/`meas'_bootstrap_samps/
			save  Intermediate_Output_Not_Exportable/`meas'_bootstrap_samps/decomp_file_ami100_`meas'_`samp'_`mkts'_bs`bsidx'_provids.dta, replace
		}

	}
}
}
}
}


if $STATIC_DECOMP == 1{

** Step 3: Does static decomp
foreach meas in $MEASURES_ALL{
foreach samp in $SAMPLE{
foreach mkts in $MARKETS{
	if (("`meas'" == "surv" & "`samp'" == "All" & "`mkts'" == "All") | ("`meas'" == "surv" & "`samp'" == "Constant" & "`mkts'" == "Below_MA_median") | ("`samp'" == "Constant" & "`mkts'" == "All" & ("`meas'" != "surv_b_qual" & "`meas'" != "surv_w_qual"))) {
	
	foreach yidx in 1 4{	
	** Pulls point estimates for each bootstrap sample
	forvalues bsidx=0/$NBOOT{	
		clear
		use  Intermediate_Output_Not_Exportable/`meas'_bootstrap_samps/decomp_file_ami100_`meas'_`samp'_`mkts'_bs`bsidx'_provids.dta, replace
		merge m:1 pn using  Intermediate_Output_Not_Exportable/decomp_file_ami100_`meas'_`samp'_`mkts'.dta, keep(match) nogenerate
			
		** Renames variables
		generate pn_postboot = _n 
		drop pn
		
		** keeps relevant period
		keep measure* master_mktsh_*  pn_postboot 
		reshape long measure master_mktsh_b_act master_mktsh_w_act master_mktsh_a_act master_mktsh_b_wgt master_mktsh_w_wgt  master_mktsh_b_wgtH master_mktsh_w_wgtH , i(pn_postboot) j(yidx)
		keep if yidx == `yidx'
		drop if master_mktsh_a_act == . 
					
		** RECALIBRATE MARKET SHARES AFTER HOSPITAL EXCLUSIONS 
		foreach type in act wgt wgtH{
		foreach group in b w {
				
			summarize master_mktsh_`group'_`type'
			local tot_share = r(sum)
			replace master_mktsh_`group'_`type' = master_mktsh_`group'_`type' / `tot_share'

		}
		}	
	
		** Does decomp ** 
			local multiplier = 100
			generate disparity_all = (master_mktsh_b_act - master_mktsh_w_act)*(measure)*`multiplier'				
			generate within_market_across_zip = (master_mktsh_w_wgt -master_mktsh_w_wgtH)*(measure)*`multiplier'	
			generate between_market = (master_mktsh_w_wgtH - master_mktsh_w_act)*(measure)*`multiplier'	
			generate within_zip = (master_mktsh_b_act - master_mktsh_w_wgt)*(measure)*`multiplier'
			generate sample = 1 
	
		collapse (sum) disparity_all between_market within_market_across_zip within_zip sample, by(yidx)
		generate bootsample = `bsidx'
		** sanity check
		assert abs(disparity_all - (between_market + within_market_across_zip + within_zip)) < .0001

		tempfile bs`bsidx'_yidx`yidx'
		save `bs`bsidx'_yidx`yidx''

	}
	}
	
	** estimates & sample in baseline & endline & difference
		use `bs0_yidx1', replace
		append using `bs0_yidx4'
		foreach var in disparity_all between_market within_market_across_zip within_zip sample{
			summarize `var' if yidx == 1
			local m_`var'1 = r(mean)
			summarize `var' if yidx == 4
			local m_`var'4 = r(mean)
			local d_`var' = `m_`var'4' - `m_`var'1'
		}
	
	** standard errors and pvalues for baseline & endline
		use `bs1_yidx1', replace
		forvalues bsidx=2/$NBOOT{	
			append using `bs`bsidx'_yidx1'
		}	
		foreach var in disparity_all between_market within_market_across_zip within_zip sample{
			rename `var' `var'1
		}
		
		drop yidx
		forvalues bsidx=1/$NBOOT{	
			merge 1:1 bootsample using `bs`bsidx'_yidx4', update nogenerate
		}	
		
		foreach var in disparity_all between_market within_market_across_zip within_zip sample {
			rename `var' `var'4
			generate d_`var' = `var'4 - `var'1
		}
				
		foreach var in disparity_all1 between_market1 within_market_across_zip1 within_zip1 disparity_all4 between_market4 within_market_across_zip4 within_zip4 d_disparity_all d_between_market d_within_market_across_zip d_within_zip{
			summarize `var' 
			local se_`var' = r(sd)
		}
	
		capture noisily mkdir Exportable_Results/Static_decompositions/
		putexcel A2 = ("Between-Race Gap (Black - White)") A3 = ("  SE") A4= ("Between Hospital Markets") A5 = ("  SE") A6 =("Between Zip Codes") A7 = ("  SE")  A8=("Within Zip Codes") A9 = ("  SE") A10 = ("Sample") B1 = ("Baseline") C1 = ("Endline") D1 = ("Difference") B2 = ("`m_disparity_all1'") B3 = ("`se_disparity_all1'") B4 = ("`m_between_market1'") B5 =("`se_between_market1'") B6 = ("`m_within_market_across_zip1'") B7 = ("`se_within_market_across_zip1'") B8 = ("`m_within_zip1'") B9 = ("`se_within_zip1'") B10 = ("`m_sample1'") C2 = ("`m_disparity_all4'") C3 = ("`se_disparity_all4'") C4 = ("`m_between_market4'") C5 =("`se_between_market4'") C6 = ("`m_within_market_across_zip4'") C7 = ("`se_within_market_across_zip4'") C8 = ("`m_within_zip4'") C9 = ("`se_within_zip4'") C10 = ("`m_sample4'")   D2 = ("`d_disparity_all'") D3 = ("`se_d_disparity_all'") D4 = ("`d_between_market'") D5 =("`se_d_between_market'") D6 = ("`d_within_market_across_zip'") D7 = ("`se_d_within_market_across_zip'") D8 = ("`d_within_zip'") D9 = ("`se_d_within_zip'") using  Exportable_Results/Static_decompositions/static_`meas'_`samp'_`mkts'.xlsx, replace		
	
}
}
}
}
}





if $DYNAMIC_DECOMP == 1{
** Step 4: Does dynamic decomp

foreach meas in $MEASURES_ALL{
foreach samp in $SAMPLE{
foreach mkts in $MARKETS{
	if ((("`meas'" == "surv_b_qual" | "`meas'" == "surv_w_qual") & "`samp'" == "All" & "`mkts'" == "All") | ("`meas'" == "surv" & "`samp'" == "Constant" & "`mkts'" == "Below_MA_median") | ("`samp'" == "Constant" & "`mkts'" == "All" & ("`meas'" != "surv_b_qual" & "`meas'" != "surv_w_qual"))) {
	
	
		** Pulls point estimates for each bootstrap sample
		forvalues bsidx=0/$NBOOT{	
			clear
			use  Intermediate_Output_Not_Exportable/`meas'_bootstrap_samps/decomp_file_ami100_`meas'_`samp'_`mkts'_bs`bsidx'_provids.dta, replace
			merge m:1 pn using  Intermediate_Output_Not_Exportable/decomp_file_ami100_`meas'_`samp'_`mkts'.dta, keep(match) nogenerate
	
			** Renames variables
			generate pn_postboot = _n 
			drop pn
			
			** keeps relevant variables
			keep pn_postboot master_mktsh_b_act* master_mktsh_w_act* master_mktsh_a_act* measure*
	
			** RECALIBRATE MARKET SHARES AFTER HOSPITAL EXCLUSIONS 
			foreach yidx in 1 4 {
			foreach group in b w a{
		
				summarize master_mktsh_`group'_act`yidx'
				local tot_share = r(sum)
				replace master_mktsh_`group'_act`yidx' = master_mktsh_`group'_act`yidx' / `tot_share'

			}
			}	
			
			local multiplier = 100	
	
			
			** does decomp
				if "`samp'" == "Constant" {
					generate did = ((master_mktsh_b_act4-master_mktsh_w_act4)*measure4 - (master_mktsh_b_act1-master_mktsh_w_act1)*measure1)*`multiplier'
					generate pure_reallocation = measure1*((master_mktsh_b_act4 - master_mktsh_b_act1)-(master_mktsh_w_act4 - master_mktsh_w_act1))*`multiplier'	
					generate first_cross = ((master_mktsh_b_act4 - master_mktsh_w_act4)-(master_mktsh_b_act1 - master_mktsh_w_act1))*(measure4 - measure1)*`multiplier' 
					generate pure_qual_imp = (master_mktsh_b_act1 - master_mktsh_w_act1)*(measure4 - measure1)*`multiplier'
					generate sample = 1

					collapse (sum) did pure_reallocation first_cross pure_qual_imp sample
					assert abs(did -  (pure_reallocation + first_cross + pure_qual_imp)) <= abs(did / 98)		
				}
	
				if "`samp'" == "All"{
					** Generates indicators for which part of sample hospital is in 
					gen byte balanced_samp = measure1 != . & measure4 != . 
					gen byte entry_samp =  measure1 == . & measure4 != .
					gen byte exit_samp =  measure1 != . & measure4 == .
		
					** generates local for average quality in baseline
					generate weight_measure1 = measure1*master_mktsh_a_act1
					summarize weight_measure1 
					local avg_quality_baseline_a = r(sum)
					
					
					** Generates market share weighted hospital quality in all periods
		
					generate did_balance = ((master_mktsh_b_act4-master_mktsh_w_act4)*measure4 - (master_mktsh_b_act1-master_mktsh_w_act1)*measure1)*`multiplier'  if balanced_samp
					generate did_entry = (master_mktsh_b_act4-master_mktsh_w_act4)*measure4*`multiplier'  if entry
					generate did_exit = - (master_mktsh_b_act1-master_mktsh_w_act1)*measure1*`multiplier'  if exit
					
					
					generate pure_reallocation = (measure1 - `avg_quality_baseline_a')*((master_mktsh_b_act4 - master_mktsh_b_act1)-(master_mktsh_w_act4 - master_mktsh_w_act1))*`multiplier' if balanced_samp
					generate first_cross = ((master_mktsh_b_act4 - master_mktsh_w_act4)-(master_mktsh_b_act1 - master_mktsh_w_act1))*(measure4 - measure1)*`multiplier'  if balanced_samp
					generate pure_qual_imp = (master_mktsh_b_act1 - master_mktsh_w_act1)*(measure4 - measure1)*`multiplier'  if balanced_samp
					generate first_entry = (master_mktsh_b_act4 - master_mktsh_w_act4)*(measure4 - `avg_quality_baseline_a')*`multiplier'  if entry_samp
					generate first_exit = (master_mktsh_b_act1 - master_mktsh_w_act1)*(measure1 - `avg_quality_baseline_a')*`multiplier'  if exit_samp				
					generate sample = 1

					collapse (sum) did_balance did_entry did_exit pure_reallocation first_cross pure_qual_imp first_entry first_exit sample
					** For really small diff_in_diff values there are rounding issues. Hence this workaround
					generate did = did_balance + did_entry + did_exit
					generate threshold = abs(did / 98) if abs(did) > .000001
					replace threshold = abs(did /10) if threshold == .  		
					assert abs(did -  (pure_reallocation + first_cross  + pure_qual_imp + first_entry  - first_exit )) <= threshold	
					drop threshold did_balance did_entry did_exit					
				}
	
				tempfile bs`bsidx'
				save `bs`bsidx''
		}
		
		** Exports results
		** first gets main estimate
		
		if "`samp'" == "Constant"{
			local varlist did pure_reallocation first_cross pure_qual_imp sample
		}		
		if "`samp'" == "All"{
			local varlist did pure_reallocation first_cross pure_qual_imp first_entry first_exit sample
		}
		
		clear
		use `bs0', replace
		foreach var in `varlist'{
			summarize `var'
			local m_`var' = r(mean)
		}
		
		
		** Pulls standard errors and p-values 
		
		use `bs1', replace
		forvalues bsidx=2/$NBOOT{	
			append using `bs`bsidx''
		}	
		
		foreach var in `varlist'{
			summarize `var' 
			local se_`var' = r(sd)
		}
	
		capture noisily mkdir Exportable_Results/Dynamic_decompositions/
		if "`samp'" == "Constant"{
			putexcel A2 = ("Change in Between-Race Gap from Baseline to Endline") A3= ("  SE") A4 = ("Differential Performance Improvement") A5 = (" SE") A6 = (" Differential Hospital Reallocation") A7 = (" SE") A8 = ("Cross") A9 = (" SE") A10 = ("Sample") B1 = ("Value") B2 = ("`m_did'") B3= ("`se_did'") B4 = ("`m_pure_qual_imp'") B5 = ("`se_pure_qual_imp'") B6 = ("`m_pure_reallocation'") B7 = ("`se_pure_reallocation'") B8 = ("`m_first_cross'") B9 = ("`se_first_cross'") B10 = ("`m_sample'") using  Exportable_Results/Dynamic_decompositions/dynamic_`meas'_`samp'_`mkts'.xlsx, replace				
		}
		if "`samp'" == "All"{
			putexcel A2 = ("Change in Between-Race Gap from Baseline to Endline") A3= ("  SE") A4 = ("Differential Performance Improvement") A5 = (" SE") A6 = (" Differential Hospital Reallocation") A7 = (" SE") A8 = ("Cross") A9 = (" SE") A10 = ("Entry") A11 = (" SE") A12 = ("Exit") A13 = (" SE") A14 = ("Sample") B1 = ("Value") B2 = ("`m_did'") B3= ("`se_did'") B4 = ("`m_pure_qual_imp'") B5 = ("`se_pure_qual_imp'") B6 = ("`m_pure_reallocation'") B7 = ("`se_pure_reallocation'") B8 = ("`m_first_cross'") B9 = ("`se_first_cross'")  B10 = ("`m_first_entry'") B11 = ("`se_first_entry'") B12 = ("`m_first_exit'") B13 = ("`se_first_exit'") B14 = ("`m_sample'") using  Exportable_Results/Dynamic_decompositions/dynamic_`meas'_`samp'_`mkts'.xlsx, replace						
		}
		
		
		
		
	
}
}
}
}
}



if $LEVELS ==1 {
** Step 5: Gets the levels for  measures across all patients
** Only calculating cardiac cath, cath or cabg, cabg, aspirin as those are shown in Table 4 and Appendix Table A6


	foreach mkts in $MARKETS{
	foreach yidx in 1 4{
	foreach meas in cath_pci cardtech CABG cath_pci_ad {
	foreach samp in $SAMPLE{
	if ((("`meas'" == "surv_b_qual" | "`meas'" == "surv_w_qual") & "`samp'" == "All" & "`mkts'" == "All") | ("`meas'" == "surv" & "`samp'" == "Constant" & "`mkts'" == "Below_MA_median") | ("`samp'" == "Constant" & "`mkts'" == "All")) {
		
			clear
			use  Intermediate_Output_Not_Exportable/decomp_file_ami100_`meas'_`samp'_`mkts'.dta, replace			
			generate weight`yidx' = master_mktsh_a_act`yidx'*measure`yidx'
			generate level = "national"
			summarize weight`yidx'
			local `meas'_`yidx' = r(sum)	
		
		}
	}
	}
	}
	}
	
	capture noisily mkdir Exportable_Results/Means/
	putexcel A2 = ("Cardiac Cath (Same-Day)") A3 =("Cardiac Cath (Any day)") A4 = ("Cath or CABG (Any Day)") A5 =("CABG (Any Day)") B1 = ("Baseline") C1 = ("Endline") B2 = ("`cath_pci_1'") C2 = ("`cath_pci_4'") B3 = ("`cath_pci_ad_1'") C3 = ("`cath_pci_ad_4'")  B4 = ("`cardtech_1'") C4 = ("`cardtech_4'") B5 = ("`CABG_1'") C5 = ("`CABG_4'") using  Exportable_Results/Means/Means.xlsx, replace
	
}	




log close




 	